function [Y,ndx,dbg] = natsortfiles(X,rgx,varargin)
% Natural-order / alphanumeric sort of filenames or foldernames.
%
% (c) 2014-2021 Stephen Cobeldick
%
% Sorts text by character code and by number value. File/folder names, file
% extensions, and path directories (if supplied) are sorted separately to
% ensure that shorter names sort before longer names. For names without
% file extensions (i.e. foldernames, or filenames without extensions) use
% the 'noext' option. Use the 'xpath' option to ignore any filepath. Use
% the 'rmdot' option to remove the folder names "." and ".." from the array.
%
%%% Example:
% P = 'C:\SomeDir\SubDir';
% S = dir(fullfile(P,'*.txt'));
% S = natsortfiles(S);
% for k = 1:numel(S)
%     F = fullfile(P,S(k).name)
% end
%
%%% Syntax:
%  Y = natsortfiles(X)
%  Y = natsortfiles(X,rgx)
%  Y = natsortfiles(X,rgx,<options>)
% [Y,ndx,dbg] = natsortfiles(X,...)
%
% To sort the elements of a string/cell array use NATSORT (File Exchange 34464)
% To sort the rows of a string/cell array use NATSORTROWS (File Exchange 47433)
%
%% File Dependency %%
%
% NATSORTFILES requires the function NATSORT (File Exchange 34464). Extra
% optional arguments are passed directly to NATSORT. See NATSORT for case-
% sensitivity, sort direction, number substring matching, and other options.
%
%% Explanation %%
%
% Using SORT on filenames will sort any of char(0:45), including the
% printing characters ' !"#$%&''()*+,-', before the file extension
% separator character '.'. Therefore NATSORTFILES splits the file-name
% from the file-extension and sorts them separately. This ensures that
% shorter names come before longer names (just like a dictionary):
%
% >> F = {'test_new.m'; 'test-old.m'; 'test.m'};
% >> sort(F) % Note '-' sorts before '.':
% ans =
%     'test-old.m'
%     'test.m'
%     'test_new.m'
% >> natsortfiles(F) % Shorter names before longer:
% ans =
%     'test.m'
%     'test-old.m'
%     'test_new.m'
%
% Similarly the path separator character within filepaths can cause longer
% directory names to sort before shorter ones, as char(0:46)<'/' and
% char(0:91)<'\'. This example on a PC demonstrates why this matters:
%
% >> D = {'A1\B', 'A+/B', 'A/B1', 'A=/B', 'A\B0'};
% >> sort(D)
% ans =   'A+/B'  'A/B1'  'A1\B'  'A=/B'  'A\B0'
% >> natsortfiles(D)
% ans =   'A\B0'  'A/B1'  'A1\B'  'A+/B'  'A=/B'
%
% NATSORTFILES splits filepaths at each path separator character and sorts
% every level of the directory hierarchy separately, ensuring that shorter
% directory names sort before longer, regardless of the characters in the names.
% On a PC separators are '/' and '\' characters, on Mac and Linux '/' only.
%
%% Examples %%
%
% >> A = {'a2.txt', 'a10.txt', 'a1.txt'}
% >> sort(A)
% ans = 'a1.txt'  'a10.txt'  'a2.txt'
% >> natsortfiles(A)
% ans = 'a1.txt'  'a2.txt'  'a10.txt'
%
% >> B = {'test2.m'; 'test10-old.m'; 'test.m'; 'test10.m'; 'test1.m'};
% >> sort(B) % Wrong number order:
% ans =
%    'test.m'
%    'test1.m'
%    'test10-old.m'
%    'test10.m'
%    'test2.m'
% >> natsortfiles(B) % Shorter names before longer:
% ans =
%    'test.m'
%    'test1.m'
%    'test2.m'
%    'test10.m'
%    'test10-old.m'
%
%%% Directory Names:
% >> C = {'A2-old\test.m';'A10\test.m';'A2\test.m';'A1\test.m';'A1-archive.zip'};
% >> sort(C) % Wrong number order, and '-' sorts before '\':
% ans =
%    'A1-archive.zip'
%    'A10\test.m'
%    'A1\test.m'
%    'A2-old\test.m'
%    'A2\test.m'
% >> natsortfiles(C) % Shorter names before longer:
% ans =
%    'A1\test.m'
%    'A1-archive.zip'
%    'A2\test.m'
%    'A2-old\test.m'
%    'A10\test.m'
%
%% Input and Output Arguments %%
%
%%% Inputs (**=default):
% X   = Array of filenames or foldernames to be sorted. Can be the struct
%       returned by DIR, a string array, or a cell array of char row vectors.
% rgx = Optional regular expression to match number substrings.
%     = [] uses the default regular expression '\d+'** to match integers.
% <options> can be supplied in any order:
%     = 'rmdot' removes the names "." and ".." from the output array.
%     = 'noext' for foldernames, or filenames without extensions.
%     = 'xpath' sorts by name only, excluding any preceding path.
%     = all remaining options are passed directly to NATSORT.
%
%%% Outputs:
% Y   = Array <X> sorted into alphanumeric order.
% ndx = NumericMatrix, indices such that Y = X(ndx). The same size as <Y>.
% dbg = CellVectorOfCellArrays, each cell contains the debug cell array of
%       one level of the path heirarchy directory names, or filenames, or
%       file extensions. Helps debug the regular expression (see NATSORT).
%
% See also SORT NATSORT NATSORTROWS DIR FILEPARTS FULLFILE NEXTNAME CELLSTR REGEXP IREGEXP SSCANF

%% Input Wrangling %%
%
fun = @(c)cellfun('isclass',c,'char') & cellfun('size',c,1)<2 & cellfun('ndims',c)<3;
%
if isstruct(X)
	assert(isfield(X,'name'),...
		'SC:natsortfiles:X:StructMissingNameField',...
		'If first input <X> is a struct then it must have field <name>.')
	nmx = {X.name};
	assert(all(fun(nmx)),...
		'SC:natsortfiles:X:NameFieldInvalidType',...
		'First input <X> field <name> must contain only character row vectors.')
	[fpt,fnm,fxt] = cellfun(@fileparts, nmx, 'UniformOutput',false);
	if isfield(X,'folder')
		fpt = {X.folder};
		assert(all(fun(fpt)),...
			'SC:natsortfiles:X:FolderFieldInvalidType',...
			'First input <X> field <folder> must contain only character row vectors.')
	end
elseif iscell(X)
	assert(all(fun(X(:))),...
		'SC:natsortfiles:X:CellContentInvalidType',...
		'First input <X> cell array must contain only character row vectors.')
	[fpt,fnm,fxt] = cellfun(@fileparts, X(:), 'UniformOutput',false);
	nmx = strcat(fnm,fxt);
elseif ischar(X)
	[fpt,fnm,fxt] = cellfun(@fileparts, cellstr(X), 'UniformOutput',false);
	nmx = strcat(fnm,fxt);
else
	assert(isa(X,'string'),...
		'SC:natsortfiles:X:InvalidType',...
		'First input <X> must be a structure, a cell array, or a string array.');
	[fpt,fnm,fxt] = cellfun(@fileparts, cellstr(X(:)), 'UniformOutput',false);
	nmx = strcat(fnm,fxt);
end
%
varargin = cellfun(@nsf1s2c, varargin, 'UniformOutput',false);
%
assert(all(fun(varargin)),...
	'SC:natsortfiles:option:InvalidType',...
	'All optional arguments must be character row vectors or string scalars.')
%
idd = strcmpi(varargin,'rmdot');
assert(nnz(idd)<2,...
	'SC:natsortfiles:rmdot:Overspecified',...
	'The "." and ".." folder handling is overspecified.')
varargin(idd) = [];
%
ide = strcmpi(varargin,'noext');
assert(nnz(ide)<2,...
	'SC:natsortfiles:noext:Overspecified',...
	'The file-extension handling is overspecified.')
varargin(ide) = [];
%
idp = strcmpi(varargin,'xpath');
assert(nnz(idp)<2,...
	'SC:natsortfiles:xpath:Overspecified',...
	'The file-path handling is overspecified.')
varargin(idp) = [];
%
if nargin>1
	varargin = [{rgx},varargin];
end
%
%% Path and Extension %%
%
% Path separator regular expression:
if ispc()
	psr = '[^/\\]+';
else % Mac & Linux
	psr = '[^/]+';
end
%
if any(idd) % Remove "." and ".." folder names
	ddx = strcmp(nmx,'.')|strcmp(nmx,'..');
	fxt(ddx) = [];
	fnm(ddx) = [];
	fpt(ddx) = [];
	nmx(ddx) = [];
end
%
if any(ide) % No file-extension
	fnm = nmx;
	fxt = [];
end
%
if any(idp) % No file-path
	mat = reshape(fnm,1,[]);
else
	% Split path into {dir,subdir,subsubdir,...}:
	spl = regexp(fpt(:),psr,'match');
	nmn = 1+cellfun('length',spl(:));
	mxn = max(nmn);
	vec = 1:mxn;
	mat = cell(mxn,numel(nmn));
	mat(:) = {''};
	%mat(mxn,:) = fnm(:); % old behavior
	mat(logical(bsxfun(@eq,vec,nmn).')) =  fnm(:);  % TRANSPOSE bug loses type (R2013b)
	mat(logical(bsxfun(@lt,vec,nmn).')) = [spl{:}]; % TRANSPOSE bug loses type (R2013b)
end
%
if numel(fxt) % File-extension
	mat(end+1,:) = fxt(:);
end
%
%% Sort File Extensions, Names, and Paths %%
%
nmr = size(mat,1)*all(size(mat));
dbg = cell(1,nmr);
ndx = 1:numel(fnm);
%
for k = nmr:-1:1
	if nargout<3 % faster:
		[~,ids] = natsort(mat(k,ndx),varargin{:});
	else % for debugging:
		[~,ids,tmp] = natsort(mat(k,ndx),varargin{:});
		[~,idb] = sort(ndx);
		dbg{k} = tmp(idb,:);
	end
	ndx = ndx(ids);
end
%
% Return the sorted input array and corresponding indices:
%
if any(idd)
	tmp = find(~ddx);
	ndx = tmp(ndx);
end
%
ndx = ndx(:);
%
if ischar(X)
	Y = X(ndx,:);
elseif any(idd)
	xsz = size(X);
	nsd = xsz~=1;
	if nnz(nsd)==1 % vector
		xsz(nsd) = numel(ndx);
		ndx = reshape(ndx,xsz);
	end
	Y = X(ndx);
else
	ndx = reshape(ndx,size(X));
	Y = X(ndx);
end
%
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%natsortfiles
function arr = nsf1s2c(arr)
% If scalar string then extract the character vector, otherwise data is unchanged.
if isa(arr,'string') && isscalar(arr)
	arr = arr{1};
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%nsf1s2c